###########################################
#  Primary Divisions: How Voters Evaluate Policy and Group Differences in Intra-Party Contests
#   - Forthcoming at The Journal of Politics
#   - Henderson et al 2021
#
###########################################
#  - code by S. Goggin & J. Henderson
########################################################
# This file produces some baseline data builds :: unmatched CCES + recoded dataa objects
#  note: unmatched/unweighted is used in final paper/analysis
########################################################
# inputs :: JAH, UCM CCES modules from 2016 cycle
# outputs :: (un)matched data, stacked by candidate, candidate x respondent
# => cces_stacked in data/cces_stacked_unmatched.Rdata
# => cces_data in data/cces_stacked_unmatched.Rdata
# => voter_matrix in data/voter_matrix.csv
# => candidate_matrix in data/candidate_matrix.csv

##############################
# NOT RUN!!!!! GIVES DETAILS ON DATA RECODING FROM CCES
##############################

rm(list=ls())
library(foreign)

setwd("~/Dropbox/IdeologyPrimaryConjoints/merged/replication0/")
ucm=read.spss('~/Dropbox/IdeologyPrimaryConjoints/ucm/matched/UCM_UMB_matched_merge.sav')
jah=read.spss('~/Dropbox/IdeologyPrimaryConjoints/yale/matched/CCES16_JAH_OUTPUT_Feb2017.sav')
load('~/Dropbox/IdeologyPrimaryConjoints/merged/replication0/data/screeners_matched_JAH_UCM.Rdata')

jah_skip=array(0,length(jah[[1]]))
ucm_skip=array(0,length(ucm[[1]]))
for(i in 1:length(jah)){
	jah_skip=jah_skip+as.numeric(grepl(jah[[i]],pattern='Skipped'))
}
for(i in 1:length(ucm)){
	ucm_skip=ucm_skip+as.numeric(grepl(ucm[[i]],pattern='Skipped'))
}
jah_skip=jah_skip/length(jah)
ucm_skip=ucm_skip/length(ucm)

jah$skipped=jah_skip
ucm$skipped=ucm_skip

jah$screen1=screeenJAH[,1]
jah$screen2=screeenJAH[,2]
ucm$screen1=screeenUCM[,1]
ucm$screen2=screeenUCM[,2]

jah_nms=names(jah)
ucm_nms=names(ucm)

jah.id=1:length(jah[[1]])
ucm.id=1:length(which(!is.na(ucm$JAHconjoints)))
ix=which(!is.na(ucm$JAHconjoints))

pre.var.names=c(
'pid3','pid7','JAHconjoints','JAHcq',
'JAH171_a','JAH172_a','JAH173_a','JAH174_a',
'JAH171_b','JAH172_b','JAH173_b','JAH174_b',
'JAH171_c','JAH172_c','JAH173_c','JAH174_c',
'JAH171_d','JAH172_d','JAH173_d','JAH174_d',
# attributes
"JAHgenderA_171"         ,  "JAHgenderB_171"          ,
"JAHraceA_171"           ,  "JAHraceB_171"            ,
"JAHreligionA_171"       ,  "JAHreligionB_171"        ,
"JAHoccupationA_171"     ,  "JAHoccupationB_171"      ,
"JAHpersonalityA_171"    ,  "JAHpersonalityB_171"     ,
"JAHendorsementsA_171"   ,  "JAHendorsementsB_171"    ,
"JAHrecordA_171"         ,  "JAHrecordB_171"          ,
"JAHgenderA_172"         ,  "JAHgenderB_172"          ,
"JAHraceA_172"           ,  "JAHraceB_172"            ,
"JAHreligionA_172"       ,  "JAHreligionB_172"        ,
"JAHoccupationA_172"     ,  "JAHoccupationB_172"      ,
"JAHpersonalityA_172"    ,  "JAHpersonalityB_172"     ,
"JAHendorsementsA_172"   ,  "JAHendorsementsB_172"    ,
"JAHrecordA_172"         ,  "JAHrecordB_172"          ,
"JAHgenderA_173"         ,  "JAHgenderB_173"          ,
"JAHraceA_173"           ,  "JAHraceB_173"            ,
"JAHreligionA_173"       ,  "JAHreligionB_173"        ,
"JAHoccupationA_173"     ,  "JAHoccupationB_173"      ,
"JAHpersonalityA_173"    ,  "JAHpersonalityB_173"     ,
"JAHendorsementsA_173"   ,  "JAHendorsementsB_173"    ,
"JAHrecordA_173"         ,  "JAHrecordB_173"          ,
"JAHgenderA_174"         ,  "JAHgenderB_174"          ,
"JAHraceA_174"           ,  "JAHraceB_174"            ,
"JAHreligionA_174"       ,  "JAHreligionB_174"        ,
"JAHoccupationA_174"     ,  "JAHoccupationB_174"      ,
"JAHpersonalityA_174"    ,  "JAHpersonalityB_174"     ,
"JAHendorsementsA_174"   ,  "JAHendorsementsB_174"    ,
"JAHrecordA_174"         ,  "JAHrecordB_174"          ,
"JAHissue1A_171"         ,  "JAHissue2A_171"          ,
"JAHissue1B_171"         ,  "JAHissue2B_171"          ,
"JAHissue1A_172"         ,  "JAHissue2A_172"          ,
"JAHissue1B_172"         ,  "JAHissue2B_172"          ,
"JAHissue1A_173"         ,  "JAHissue2A_173"          ,
"JAHissue1B_173"         ,  "JAHissue2B_173"          ,
"JAHissue1A_174"         ,  "JAHissue2A_174"          ,
"JAHissue1B_174"         ,  "JAHissue2B_174"   				)


post.var.names=c(
'pid3','pid7',"JAHconjointsPost","JAHcq_post",
'JAH371_a','JAH372_a','JAH373_a','JAH374_a',
'JAH371_b','JAH372_b','JAH373_b','JAH374_b',
'JAH371_c','JAH372_c','JAH373_c','JAH374_c',
'JAH371_d','JAH372_d','JAH373_d','JAH374_d',
"JAHgenderA_371"         ,  "JAHgenderB_371"          ,
"JAHraceA_371"           ,  "JAHraceB_371"            ,
"JAHreligionA_371"       ,  "JAHreligionB_371"        ,
"JAHoccupationA_371"     ,  "JAHoccupationB_371"      ,
"JAHpersonalityA_371"    ,  "JAHpersonalityB_371"     ,
"JAHendorsementsA_371"   ,  "JAHendorsementsB_371"    ,
"JAHrecordA_371"         ,  "JAHrecordB_371"          ,
"JAHgenderA_372"         ,  "JAHgenderB_372"          ,
"JAHraceA_372"           ,  "JAHraceB_372"            ,
"JAHreligionA_372"       ,  "JAHreligionB_372"        ,
"JAHoccupationA_372"     ,  "JAHoccupationB_372"      ,
"JAHpersonalityA_372"    ,  "JAHpersonalityB_372"     ,
"JAHendorsementsA_372"   ,  "JAHendorsementsB_372"    ,
"JAHrecordA_372"         ,  "JAHrecordB_372"          ,
"JAHgenderA_373"         ,  "JAHgenderB_373"          ,
"JAHraceA_373"           ,  "JAHraceB_373"            ,
"JAHreligionA_373"       ,  "JAHreligionB_373"        ,
"JAHoccupationA_373"     ,  "JAHoccupationB_373"      ,
"JAHpersonalityA_373"    ,  "JAHpersonalityB_373"     ,
"JAHendorsementsA_373"   ,  "JAHendorsementsB_373"    ,
"JAHrecordA_373"         ,  "JAHrecordB_373"          ,
"JAHgenderA_374"         ,  "JAHgenderB_374"          ,
"JAHraceA_374"           ,  "JAHraceB_374"            ,
"JAHreligionA_374"       ,  "JAHreligionB_374"        ,
"JAHoccupationA_374"     ,  "JAHoccupationB_374"      ,
"JAHpersonalityA_374"    ,  "JAHpersonalityB_374"     ,
"JAHendorsementsA_374"   ,  "JAHendorsementsB_374"    ,
"JAHrecordA_374"         ,  "JAHrecordB_374"          ,
"JAHissue1A_371"         ,  "JAHissue2A_371"          ,
"JAHissue1B_371"         ,  "JAHissue2B_371"          ,
"JAHissue1A_372"         ,  "JAHissue2A_372"          ,
"JAHissue1B_372"         ,  "JAHissue2B_372"          ,
"JAHissue1A_373"         ,  "JAHissue2A_373"          ,
"JAHissue1B_373"         ,  "JAHissue2B_373"          ,
"JAHissue1A_374"         ,  "JAHissue2A_374"          ,
"JAHissue1B_374"         ,  "JAHissue2B_374"					)



common.var.names=names(jah)[c(137:488,609:803)]

jah.common=matrix(NA,length(jah[[1]])*2,length(common.var.names))
ucm.common=matrix(NA,length(ix)*1,length(common.var.names))

jah.pre=matrix(NA,length(jah[[1]])*2,length(pre.var.names))
jah.post=matrix(NA,length(jah[[1]])*2,length(post.var.names))
ucm.pre=matrix(NA,length(ix)*1,length(pre.var.names))

library(stringr)
for(j in 1:length(common.var.names)){
	iz=which(names(jah)==common.var.names[j])
	iy=which(names(ucm)==common.var.names[j])
	jah.common[,j]=as.character(str_trim(jah[[iz]]))
	ucm.common[,j]=as.character(str_trim(ucm[[iy]][ix]))
}

for(j in 1:length(pre.var.names)){
	iq=which(names(jah)==pre.var.names[j])
	iz=which(names(jah)==post.var.names[j])
	iy=which(names(ucm)==pre.var.names[j])
	jah.pre[,j]=as.character(str_trim(jah[[iq]]))
	jah.post[,j]=as.character(str_trim(jah[[iz]]))
	ucm.pre[,j]=as.character(str_trim(ucm[[iy]][ix]))
}

ucm.pre=as.data.frame(cbind(ucm.pre,ucm.common))
jah.pre=as.data.frame(cbind(jah.pre,jah.common))
jah.post=as.data.frame(cbind(jah.post,jah.common))

names(ucm.pre)=c(pre.var.names,common.var.names)
names(jah.pre)=c(pre.var.names,common.var.names)
names(jah.post)=c(pre.var.names,common.var.names)

ucm.pre$id=ix
jah.pre$id=1:nrow(jah.pre)
jah.post$id=1:nrow(jah.post)

jah.pre$pre=1
jah.post$pre=0
ucm.pre$pre=1

ucm.pre$mod='ucm'
jah.pre$mod='jah'
jah.post$mod='jah'

cces_data=rbind(jah.pre,jah.post,ucm.pre)
save(cces_data,file='data/cces_data_matched.Rdata')


# stack longways
i1=grep(names(cces_data),pattern='171')
i2=grep(names(cces_data),pattern='172')
i3=grep(names(cces_data),pattern='173')
i4=grep(names(cces_data),pattern='174')

ii=c(i1,i2,i3,i4)

ii_data=cces_data[,-c(ii)]
i1_data=cces_data[,c(i1)]
i2_data=cces_data[,c(i2)]
i3_data=cces_data[,c(i3)]
i4_data=cces_data[,c(i4)]

i1_data=cbind(ii_data[,c(1:4)],i1_data,ii_data[,c(5:ncol(ii_data))])
i2_data=cbind(ii_data[,c(1:4)],i2_data,ii_data[,c(5:ncol(ii_data))])
i3_data=cbind(ii_data[,c(1:4)],i3_data,ii_data[,c(5:ncol(ii_data))])
i4_data=cbind(ii_data[,c(1:4)],i4_data,ii_data[,c(5:ncol(ii_data))])

names(i4_data)=names(i3_data)=names(i2_data)=names(i1_data)

cces_stacked=rbind(i1_data,i2_data,i3_data,i4_data)
cces_stacked$indx=paste(cces_stacked$mod,cces_stacked$id,sep='_')

save(cces_stacked,file='data/cces_stacked_matched.Rdata')


########################################################
# check unmatched to matched data
########################################################

##ucm1=read.spss('~/Dropbox/IdeologyPrimaryConjoints/ucm/matched/UCM_UMB_matched_merge.sav')
#jah1=read.spss('~/Dropbox/IdeologyPrimaryConjoints/yale/matched/CCES16_JAH_OUTPUT_Feb2017.sav')

##ucm2=read.spss('~/Dropbox/IdeologyPrimaryConjoints/ucm/unmatched/CCES16_UCM_OUTPUT_Feb2017_unmatched.sav')
#jah2=read.spss('~/Dropbox/IdeologyPrimaryConjoints/yale/unmatched/CCES16_JAH_OUTPUT_Feb2017_unmatched.sav')

#mean(names(jah1)==names(jah2))==1
#TRUE

########################################################
# unmatched data
########################################################

rm(list=ls())
library(foreign)

setwd("~/Dropbox/IdeologyPrimaryConjoints/merged/replication0/")
ucm=read.spss('~/Dropbox/IdeologyPrimaryConjoints/ucm/unmatched/CCES16_UCM_OUTPUT_Feb2017_unmatched.sav')
jah=read.spss('~/Dropbox/IdeologyPrimaryConjoints/yale/unmatched/CCES16_JAH_OUTPUT_Feb2017_unmatched.sav')
load('~/Dropbox/IdeologyPrimaryConjoints/merged/replication0/data/screeners_unmatched_JAH_UCM.Rdata')

jah_skip=array(0,length(jah[[1]]))
ucm_skip=array(0,length(ucm[[1]]))
for(i in 1:length(jah)){
	jah_skip=jah_skip+as.numeric(grepl(jah[[i]],pattern='Skipped'))
}
for(i in 1:length(ucm)){
	ucm_skip=ucm_skip+as.numeric(grepl(ucm[[i]],pattern='Skipped'))
}
jah_skip=jah_skip/length(jah)
ucm_skip=ucm_skip/length(ucm)

jah$skipped=jah_skip
ucm$skipped=ucm_skip

jah$screen1=screeenJAH[,1]
jah$screen2=screeenJAH[,2]
ucm$screen1=screeenUCM[,1]
ucm$screen2=screeenUCM[,2]

jah_nms=names(jah)
ucm_nms=names(ucm)

jah.id=1:length(jah[[1]])
ucm.id=1:length(which(!is.na(ucm$JAHconjoints)))
ix=which(!is.na(ucm$JAHconjoints))

pre.var.names=c(
'pid3','pid7','JAHconjoints','JAHcq',
'JAH171_a','JAH172_a','JAH173_a','JAH174_a',
'JAH171_b','JAH172_b','JAH173_b','JAH174_b',
'JAH171_c','JAH172_c','JAH173_c','JAH174_c',
'JAH171_d','JAH172_d','JAH173_d','JAH174_d',
# attributes
"JAHgenderA_171"         ,  "JAHgenderB_171"          ,
"JAHraceA_171"           ,  "JAHraceB_171"            ,
"JAHreligionA_171"       ,  "JAHreligionB_171"        ,
"JAHoccupationA_171"     ,  "JAHoccupationB_171"      ,
"JAHpersonalityA_171"    ,  "JAHpersonalityB_171"     ,
"JAHendorsementsA_171"   ,  "JAHendorsementsB_171"    ,
"JAHrecordA_171"         ,  "JAHrecordB_171"          ,
"JAHgenderA_172"         ,  "JAHgenderB_172"          ,
"JAHraceA_172"           ,  "JAHraceB_172"            ,
"JAHreligionA_172"       ,  "JAHreligionB_172"        ,
"JAHoccupationA_172"     ,  "JAHoccupationB_172"      ,
"JAHpersonalityA_172"    ,  "JAHpersonalityB_172"     ,
"JAHendorsementsA_172"   ,  "JAHendorsementsB_172"    ,
"JAHrecordA_172"         ,  "JAHrecordB_172"          ,
"JAHgenderA_173"         ,  "JAHgenderB_173"          ,
"JAHraceA_173"           ,  "JAHraceB_173"            ,
"JAHreligionA_173"       ,  "JAHreligionB_173"        ,
"JAHoccupationA_173"     ,  "JAHoccupationB_173"      ,
"JAHpersonalityA_173"    ,  "JAHpersonalityB_173"     ,
"JAHendorsementsA_173"   ,  "JAHendorsementsB_173"    ,
"JAHrecordA_173"         ,  "JAHrecordB_173"          ,
"JAHgenderA_174"         ,  "JAHgenderB_174"          ,
"JAHraceA_174"           ,  "JAHraceB_174"            ,
"JAHreligionA_174"       ,  "JAHreligionB_174"        ,
"JAHoccupationA_174"     ,  "JAHoccupationB_174"      ,
"JAHpersonalityA_174"    ,  "JAHpersonalityB_174"     ,
"JAHendorsementsA_174"   ,  "JAHendorsementsB_174"    ,
"JAHrecordA_174"         ,  "JAHrecordB_174"          ,
"JAHissue1A_171"         ,  "JAHissue2A_171"          ,
"JAHissue1B_171"         ,  "JAHissue2B_171"          ,
"JAHissue1A_172"         ,  "JAHissue2A_172"          ,
"JAHissue1B_172"         ,  "JAHissue2B_172"          ,
"JAHissue1A_173"         ,  "JAHissue2A_173"          ,
"JAHissue1B_173"         ,  "JAHissue2B_173"          ,
"JAHissue1A_174"         ,  "JAHissue2A_174"          ,
"JAHissue1B_174"         ,  "JAHissue2B_174"   )

post.var.names=c(
'pid3','pid7',"JAHconjointsPost","JAHcq_post",
'JAH371_a','JAH372_a','JAH373_a','JAH374_a',
'JAH371_b','JAH372_b','JAH373_b','JAH374_b',
'JAH371_c','JAH372_c','JAH373_c','JAH374_c',
'JAH371_d','JAH372_d','JAH373_d','JAH374_d',
"JAHgenderA_371"         ,  "JAHgenderB_371"          ,
"JAHraceA_371"           ,  "JAHraceB_371"            ,
"JAHreligionA_371"       ,  "JAHreligionB_371"        ,
"JAHoccupationA_371"     ,  "JAHoccupationB_371"      ,
"JAHpersonalityA_371"    ,  "JAHpersonalityB_371"     ,
"JAHendorsementsA_371"   ,  "JAHendorsementsB_371"    ,
"JAHrecordA_371"         ,  "JAHrecordB_371"          ,
"JAHgenderA_372"         ,  "JAHgenderB_372"          ,
"JAHraceA_372"           ,  "JAHraceB_372"            ,
"JAHreligionA_372"       ,  "JAHreligionB_372"        ,
"JAHoccupationA_372"     ,  "JAHoccupationB_372"      ,
"JAHpersonalityA_372"    ,  "JAHpersonalityB_372"     ,
"JAHendorsementsA_372"   ,  "JAHendorsementsB_372"    ,
"JAHrecordA_372"         ,  "JAHrecordB_372"          ,
"JAHgenderA_373"         ,  "JAHgenderB_373"          ,
"JAHraceA_373"           ,  "JAHraceB_373"            ,
"JAHreligionA_373"       ,  "JAHreligionB_373"        ,
"JAHoccupationA_373"     ,  "JAHoccupationB_373"      ,
"JAHpersonalityA_373"    ,  "JAHpersonalityB_373"     ,
"JAHendorsementsA_373"   ,  "JAHendorsementsB_373"    ,
"JAHrecordA_373"         ,  "JAHrecordB_373"          ,
"JAHgenderA_374"         ,  "JAHgenderB_374"          ,
"JAHraceA_374"           ,  "JAHraceB_374"            ,
"JAHreligionA_374"       ,  "JAHreligionB_374"        ,
"JAHoccupationA_374"     ,  "JAHoccupationB_374"      ,
"JAHpersonalityA_374"    ,  "JAHpersonalityB_374"     ,
"JAHendorsementsA_374"   ,  "JAHendorsementsB_374"    ,
"JAHrecordA_374"         ,  "JAHrecordB_374"          ,
"JAHissue1A_371"         ,  "JAHissue2A_371"          ,
"JAHissue1B_371"         ,  "JAHissue2B_371"          ,
"JAHissue1A_372"         ,  "JAHissue2A_372"          ,
"JAHissue1B_372"         ,  "JAHissue2B_372"          ,
"JAHissue1A_373"         ,  "JAHissue2A_373"          ,
"JAHissue1B_373"         ,  "JAHissue2B_373"          ,
"JAHissue1A_374"         ,  "JAHissue2A_374"          ,
"JAHissue1B_374"         ,  "JAHissue2B_374")

common.var.names=names(jah)[c(137:488,609:803)]

jah.common=matrix(NA,length(jah[[1]])*2,length(common.var.names))
ucm.common=matrix(NA,length(ix)*1,length(common.var.names))

jah.pre=matrix(NA,length(jah[[1]])*2,length(pre.var.names))
jah.post=matrix(NA,length(jah[[1]])*2,length(post.var.names))
ucm.pre=matrix(NA,length(ix)*1,length(pre.var.names))

library(stringr)
for(j in 1:length(common.var.names)){
	iz=which(names(jah)==common.var.names[j])
	iy=which(names(ucm)==common.var.names[j])
	jah.common[,j]=as.character(str_trim(jah[[iz]]))
	ucm.common[,j]=as.character(str_trim(ucm[[iy]][ix]))
}



for(j in 1:length(pre.var.names)){
	iq=which(names(jah)==pre.var.names[j])
	iz=which(names(jah)==post.var.names[j])
	iy=which(names(ucm)==pre.var.names[j])
	jah.pre[,j]=as.character(str_trim(jah[[iq]]))
	jah.post[,j]=as.character(str_trim(jah[[iz]]))
	ucm.pre[,j]=as.character(str_trim(ucm[[iy]][ix]))
}

ucm.pre=as.data.frame(cbind(ucm.pre,ucm.common))
jah.pre=as.data.frame(cbind(jah.pre,jah.common))
jah.post=as.data.frame(cbind(jah.post,jah.common))


names(ucm.pre)=c(pre.var.names,common.var.names)
names(jah.pre)=c(pre.var.names,common.var.names)
names(jah.post)=c(pre.var.names,common.var.names)

ucm.pre$id=ix
jah.pre$id=1:nrow(jah.pre)
jah.post$id=1:nrow(jah.post)

jah.pre$pre=1
jah.post$pre=0
ucm.pre$pre=1

ucm.pre$mod='ucm'
jah.pre$mod='jah'
jah.post$mod='jah'

cces_data=rbind(jah.pre,jah.post,ucm.pre)
save(cces_data,file='data/cces_data_unmatched.Rdata')


# stack longways
i1=grep(names(cces_data),pattern='171')
i2=grep(names(cces_data),pattern='172')
i3=grep(names(cces_data),pattern='173')
i4=grep(names(cces_data),pattern='174')

ii=c(i1,i2,i3,i4)

ii_data=cces_data[,-c(ii)]
i1_data=cces_data[,c(i1)]
i2_data=cces_data[,c(i2)]
i3_data=cces_data[,c(i3)]
i4_data=cces_data[,c(i4)]

i1_data=cbind(ii_data[,c(1:4)],i1_data,ii_data[,c(5:ncol(ii_data))])
i2_data=cbind(ii_data[,c(1:4)],i2_data,ii_data[,c(5:ncol(ii_data))])
i3_data=cbind(ii_data[,c(1:4)],i3_data,ii_data[,c(5:ncol(ii_data))])
i4_data=cbind(ii_data[,c(1:4)],i4_data,ii_data[,c(5:ncol(ii_data))])

names(i4_data)=names(i3_data)=names(i2_data)=names(i1_data)

cces_stacked=rbind(i1_data,i2_data,i3_data,i4_data)
cces_stacked$indx=paste(cces_stacked$mod,cces_stacked$id,sep='_')

save(cces_stacked,file='data/cces_stacked_unmatched.Rdata')

#END merge step 1

rm(list=ls())
library(ggplot2)
library(stringr)

# messy function to reorder by some estimate value
lableOrder=function(xmat,labels,label.groups,omits,o.column){

	# denote which label is to be omitted on the label
	for(i in 1:length(omits)){
		labels[which(labels==omits[i])]=paste('omit',labels[which(labels==omits[i])],sep='_')
	}

	# break groups into levels
	un_group=unique(label.groups)

	# this is the item to sort on, typically global or independent
	xm=xmat[,o.column]

	# vector which will contain row order
	xo=1:length(xm)

	# rearranging roworder within level
	for(j in 1:length(un_group)){
		ix=which(label.groups==un_group[j])
		if(length(ix)>2){
			ix=ix[!grepl(labels[ix],pattern='omit')]
			xo[ix]=xo[ix][order(xm[ix])]
		}
	}
	return(xmat[xo,])
}

reOrder=function(x,o){
	ix=array(NA,nrow(x))
	for(i in 1:length(o)){
		ix[i]=which(x$iv_order==o[i])
	}
	return(x[ix,])
}

dirs="~/Dropbox/IdeologyPrimaryConjoints/merged/replication0/"
setwd(dirs)

load(paste(dirs,"data/cces_stacked_unmatched.Rdata",sep=''))

###########################################
###First, need to stack based on candidates, not just candidate pairs (and also get text out for labels later)

#This has leaners as independents, which is incorrect
#cces_stacked$pid3clean <- ifelse(cces_stacked$pid3=="Democrat",-1,ifelse(cces_stacked$pid3=="Republican",1,0))

library(car)

# conditional to build the candidate matrix, only needs to be produced once
#build.candidate_matrix=T
#if(build.candidate_matrix==T){

#Use this for pid3 w/ leaners with party and everyone else as independent (which matches the randomization)
#cces_stacked$pid3clean <- recode(cces_stacked$pid7,"'Independent'=0;'Not sure'=0;'Lean Democrat'=-1;'Not very strong Democrat'=-1;'Strong Democrat'=-1;'Lean Republican'=1;'Not very strong Republican'=1;'Strong Republican'=1;else=NA",as.factor.result=F)
cces_stacked$pid3clean <- as.numeric(as.character(recode(cces_stacked$pid7,"'Independent'=0;'Not sure'=0;'Lean Democrat'=-1;'Not very strong Democrat'=-1;'Strong Democrat'=-1;'Lean Republican'=1;'Not very strong Republican'=1;'Strong Republican'=1;else=NA")))
cces_stacked$CC16_421_dem[which(cces_stacked$CC16_421_dem=='Not Asked')]=NA
cces_stacked$CC16_421_dem[which(cces_stacked$CC16_421_dem=='Skipped')]=NA
cces_stacked$CC16_421_rep[which(cces_stacked$CC16_421_rep=='Not Asked')]=NA
cces_stacked$CC16_421_rep[which(cces_stacked$CC16_421_rep=='Skipped')]=NA
cces_stacked$CC16_421b[which(cces_stacked$CC16_421b=='Not Asked')]=NA
cces_stacked$CC16_421b[which(cces_stacked$CC16_421b=='Skipped')]=NA

cces_stacked$CC16_421_dem=gsub(cces_stacked$CC16_421_dem,pattern='so strong',replace='very strong')
cces_stacked$CC16_421_rep=gsub(cces_stacked$CC16_421_rep,pattern='so strong',replace='very strong')
cces_stacked$CC16_421b=gsub(cces_stacked$CC16_421b,pattern='Neither',replace='Independent')
cces_stacked$CC16_421b=gsub(cces_stacked$CC16_421b,pattern='The Democratic Party',replace='Lean Democrat')
cces_stacked$CC16_421b=gsub(cces_stacked$CC16_421b,pattern='The Republican Party',replace='Lean Republican')

cces_stacked$pid7post=NA
cces_stacked$pid7post[which(!is.na(cces_stacked$CC16_421_dem))]=cces_stacked$CC16_421_dem[which(!is.na(cces_stacked$CC16_421_dem))]
cces_stacked$pid7post[which(!is.na(cces_stacked$CC16_421_rep))]=cces_stacked$CC16_421_rep[which(!is.na(cces_stacked$CC16_421_rep))]
cces_stacked$pid7post[which(!is.na(cces_stacked$CC16_421b))]=cces_stacked$CC16_421b[which(!is.na(cces_stacked$CC16_421b))]



# making sure not to combine ucm and jan respondents
cces_stacked$id[grep(cces_stacked$indx,pattern='ucm')]=cces_stacked$id[grep(cces_stacked$indx,pattern='ucm')]+10000

attach(cces_stacked)

#Building a matrix (not just pure stacking, recoding some factors, etc. inside)
n.rows=nrow(cces_stacked)

candidate_matrix <- data.frame(
candidate=c(rep("A",n.rows),rep("B",n.rows)),
respondent=c(id,id),
pid3=c(pid3clean,pid3clean),
#pid7=c(pid7,pid7),
conjoints=c(JAHconjoints,JAHconjoints),
dv_choice=c(ifelse(JAH171_a=="Candidate A",1,0),ifelse(JAH171_a=="Candidate B",1,0)),
dv_similar=c(ifelse(JAH171_b=="Candidate A",1,0),ifelse(JAH171_b=="Candidate B",1,0)),
dv_favor=c(ifelse(JAH171_c=="Very favorable",1,ifelse(JAH171_c=="Somewhat favorable",0.666,ifelse(JAH171_c=="Somewhat unfavorable",0.333,ifelse(JAH171_c=="Very unfavorable",0,NA)))),ifelse(JAH171_d=="Very favorable",1,ifelse(JAH171_d=="Somewhat favorable",0.666,ifelse(JAH171_d=="Somewhat unfavorable",0.333,ifelse(JAH171_d=="Very unfavorable",0,NA))))),
gender=c(as.character(JAHgenderA_171),as.character(JAHgenderB_171)),
race=c(as.character(JAHraceA_171),as.character(JAHraceB_171)),
religion=c(as.character(JAHreligionA_171),as.character(JAHreligionB_171)),
occupation=c(as.character(JAHoccupationA_171),as.character(JAHoccupationB_171)),
personality=c(as.character(JAHpersonalityA_171),as.character(JAHpersonalityB_171)),
endorsements=c(as.character(JAHendorsementsA_171),as.character(JAHendorsementsB_171)),
record=c(as.character(JAHrecordA_171),as.character(JAHrecordB_171)),
issue1=c(as.character(JAHissue1A_171),as.character(JAHissue1B_171)),
issue2=c(as.character(JAHissue2A_171),as.character(JAHissue2B_171))
)

voter_matrix=data.frame(
	age 			= c(2016-as.numeric(as.character(cces_stacked$birthyr)),2016-as.numeric(as.character(cces_stacked$birthyr))),
	gender 			= c(as.character(gender),as.character(gender)),
	race   			= c(as.character(race),as.character(race)),
	educ   			= c(as.character(educ),as.character(educ)),
	edloan 	  		= c(as.character(edloan),as.character(edloan)),
	married 		= c(as.character(marstat),as.character(marstat)),
	reg     		= c(as.character(votereg),as.character(votereg)),
	vote12    		= c(as.character(CC16_316),as.character(CC16_316)),
	vote12_obama 	= c(as.character(CC16_326),as.character(CC16_326)),
	primary			= c(as.numeric(grepl(cces_stacked$CC16_327,pattern='Yes')),as.numeric(grepl(cces_stacked$CC16_327,pattern='Yes'))),
	state     		= c(as.character(inputstate),as.character(inputstate)),
	employ   		= c(as.character(employ),as.character(employ)),
	job   	   		= c(as.character(industryclass),as.character(industryclass)),
	religion 		= c(as.character(religpew),as.character(religpew)),
	born_again 		= c(as.character(pew_bornagain),as.character(pew_bornagain)),
	milstat_1  		= c(as.character(milstat_1),as.character(milstat_1)),  	   	 	#         Military Household - None
	milstat_2  		= c(as.character(milstat_2),as.character(milstat_2)),  	   	 	#         Military Household - None
	milstat_3  		= c(as.character(milstat_3),as.character(milstat_3)),  	   	 	#         Military Household - None
	milstat_4  		= c(as.character(milstat_4),as.character(milstat_4)),  	   	 	#         Military Household - None
	milstat_5  		= c(as.character(milstat_5),as.character(milstat_5)),  	   	 	#         Military Household - None
	unionhh  		= c(as.character(unionhh),as.character(unionhh)),
	faminc    		= c(as.character(faminc),as.character(faminc)), 				#         Family income
	investor  		= c(as.character(investor),as.character(investor)), 			#         Stock ownership
	sexuality 		= c(as.character(sexuality),as.character(sexuality)), 			#         Sexual Orientation
	trans     		= c(as.character(trans),as.character(trans)),  					#		  Transgender

	healthins_1 	= c(as.character(healthins_1),as.character(healthins_1)),     	#         Health insurance - Yes, through my job or a family member's employer
	healthins_2 	= c(as.character(healthins_2),as.character(healthins_2)),    	#         Health insurance - Yes, through a government program, such as Medicare or Medicaid
	healthins_3 	= c(as.character(healthins_3),as.character(healthins_3)),    	#         Health insurance - Yes, through my school
	healthins_4 	= c(as.character(healthins_4),as.character(healthins_4)),    	#         Health insurance - Yes, I purchased my own
	healthins_5 	= c(as.character(healthins_5),as.character(healthins_5)),    	#         Health insurance - Not sure
	healthins_6 	= c(as.character(healthins_6),as.character(healthins_6)),    	#         Health insurance - No
	healthins2 	 	= c(as.character(healthins2),as.character(healthins2)),    		#         Purchase health insurance thru exchange

	newsint				= c(as.character(newsint),  as.character(newsint)),
	media_blog			= c(as.character(CC16_300_1),as.character(CC16_300_1)),       	#        Media Use - Blog
	media_tv    		= c(as.character(CC16_300_2),as.character(CC16_300_2)),       	#        Media Use - TV
	media_news			= c(as.character(CC16_300_3),as.character(CC16_300_3)),       	#        Media Use - Newspaper
	media_radio			= c(as.character(CC16_300_4),as.character(CC16_300_4)),       	#        Media Use - Radio
	media_social		= c(as.character(CC16_300_5),as.character(CC16_300_5)),       	#        Media Use - Social Media
	media_none			= c(as.character(CC16_300_6),as.character(CC16_300_6)),       	#        Media Use - None
	media_natlnews		= c(as.character(CC16_300b),as.character(CC16_300b)),        	#        Watch local news, national news or both
	media_printnews		= c(as.character(CC16_300c),as.character(CC16_300c)),        	#        Read print news, online news or both
	socmedia_polstory1	= c(as.character(CC16_300d_1),as.character(CC16_300d_1)),      #        Social media - Posted a story, photo, video or link about politics
	socmedia_polcomment	= c(as.character(CC16_300d_2),as.character(CC16_300d_2)),      #        Social media - Posted a comment about politics
	socmedia_polstory2	= c(as.character(CC16_300d_3),as.character(CC16_300d_3)),      #        Social media - Read a story or watched a video about politics
	socmedia_polfollow	= c(as.character(CC16_300d_4),as.character(CC16_300d_4)),      #        Social media - Followed a political event
	socmedia_polstory3	= c(as.character(CC16_300d_5),as.character(CC16_300d_5)),      #        Social media - Forwarded a story, photo, video or link about politics to friends

	mip_guns 			= c(as.character(CC16_301a),as.character(CC16_301a)), #               Most Important Problem - Gun control
	mip_abortion 		= c(as.character(CC16_301b),as.character(CC16_301b)), #               Most Important Problem - Abortion
	mip_taxes 			= c(as.character(CC16_301c),as.character(CC16_301c)), #               Most Important Problem - Taxes
	mip_immigration 	= c(as.character(CC16_301d),as.character(CC16_301d)), #               Most Important Problem - Immigration
	mip_deficit 		= c(as.character(CC16_301e),as.character(CC16_301e)), #               Most Important Problem - Budget deficit
	mip_defense 		= c(as.character(CC16_301f),as.character(CC16_301f)), #               Most Important Problem - Defense spending
	mip_socsec 			= c(as.character(CC16_301g),as.character(CC16_301g)), #               Most Important Problem - Social security
	mip_env 			= c(as.character(CC16_301h),as.character(CC16_301h)), #               Most Important Problem - Environment
	mip_jobs 			= c(as.character(CC16_301i),as.character(CC16_301i)), #               Most Important Problem - Jobs
	mip_crime 			= c(as.character(CC16_301j),as.character(CC16_301j)), #               Most Important Problem - Crime
	mip_natlsecurity 	= c(as.character(CC16_301k),as.character(CC16_301k)), #               Most Important Problem - National security
	mip_race 			= c(as.character(CC16_301l),as.character(CC16_301l)), #               Most Important Problem - Race relations
	mip_healthcare 		= c(as.character(CC16_301m),as.character(CC16_301m)), #               Most Important Problem - Health care
	mip_gays 			= c(as.character(CC16_301n),as.character(CC16_301n)), #               Most Important Problem - Gay marriage
	mip_govtcorruption 	= c(as.character(CC16_301o),as.character(CC16_301o)), #               Most Important Problem - Government corruption
	mip_religion 		= c(as.character(pew_religimp),as.character(pew_religimp)),  # PEW importance of religion

	us_house_majority	= c(as.character(CC16_321a),as.character(CC16_321a)),
	us_senate_majority	= c(as.character(CC16_321b),as.character(CC16_321b)),
	#st_house_majority	= c(as.character(CC16_321c),as.character(CC16_321c)),
	#st_senate_majority	= c(as.character(CC16_321d),as.character(CC16_321d)),
	know_gov			= c(as.character(CC16_322a),as.character(CC16_322a)),
	know_senate1		= c(as.character(CC16_322b),as.character(CC16_322b)),
	know_senate2		= c(as.character(CC16_322c),as.character(CC16_322c)),
	know_house			= c(as.character(CC16_322d),as.character(CC16_322d)),
	party_gov			= c(as.character(CurrentGovParty),as.character(CurrentGovParty)),
	party_senate1		= c(as.character(CurrentSen1Party),as.character(CurrentSen1Party)),
	party_senate2		= c(as.character(CurrentSen2Party),as.character(CurrentSen2Party)),
	party_house			= c(as.character(CurrentHouseParty),as.character(CurrentHouseParty)),
	dem_place			= c(as.character(CC16_340g),as.character(CC16_340g)),
	rep_place			= c(as.character(CC16_340h),as.character(CC16_340h)),
	self_place			= c(as.character(CC16_340a),as.character(CC16_340a)),
self_place_attempt			= c(as.character(CC16_340a),as.character(CC16_340a)),
# groups * positions :: rough L to R score for each dimension of interest

# y, y :: - trade (jobs)		| protection 			| business 					| tea party | labor
# y, y :: - lgbt 				| traditional marriage  | christian 				| tea party | labor
# y, y :: - govt assist 		| assist abuse 			| civil rights | business	| tea party | labor
# y, y :: - immigrant citizen 	| border security 		| business 					| tea party | labor
# y, y :: - prochoice 			| prolife  				| reproductive | christian 	| tea party | labor
# y, y :: - taxes hikes 		| tax cuts 				| taxes | business 			| tea party | labor
# y, y :: - co2 env 			| oil energy  			| env | energy              | tea party | labor
# y, y :: - policing/race 		| crime 				| civil rights              | tea party | labor
# y, y :: - defense  			| strong military 		| vets                      | tea party | labor
# y, y :: - gun control  		| gun rights 			| guns control | gun rights | tea party | labor

	guns_policy1 =  c(as.character(CC16_330a),as.character(CC16_330a)),               # Gun Control - Background checks for all sales, including at gun shows and over the Internet
	guns_policy2 =  c(as.character(CC16_330b),as.character(CC16_330b)),               # Gun Control - Prohibit state and local governments from publishing the names and addresses of all gun owners
	guns_policy3 =  c(as.character(CC16_330d),as.character(CC16_330d)),               # Gun Control - Ban assault rifles
	guns_policy4 =  c(as.character(CC16_330e),as.character(CC16_330e)),               # Gun Control - Make it easier for people to obtain concealed-carry permit

	immi_policy1 =  c(as.character(CC16_331_1),as.character(CC16_331_1)),              # Immigration - Grant legal status to all illegal immigrants who have held jobs and paid taxes for at least 3 years, and not been convicted of any felony crimes
	immi_policy2 =  c(as.character(CC16_331_2),as.character(CC16_331_2)),              # Immigration - Increase the number of border patrols on the U.S.-Mexican border
	immi_policy3 =  c(as.character(CC16_331_3),as.character(CC16_331_3)),              # Immigration - Grant legal status to people who were brought to the US illegally as children, but who have graduated from a U.S. high school
	immi_policy4 =  c(as.character(CC16_331_4),as.character(CC16_331_4)),              # Immigration - Fine U.S. businesses that hire illegal immigrants
	immi_policy5 =  c(as.character(CC16_331_5),as.character(CC16_331_5)),              # Immigration - Admit no refugees from Syria
	immi_policy6 =  c(as.character(CC16_331_6),as.character(CC16_331_6)),              # Immigration - Increase the number of visas for overseas workers to work in the U.S.
	immi_policy7 =  c(as.character(CC16_331_7),as.character(CC16_331_7)),              # Immigration - Identify and deport illegal immigrants
	immi_policy8 =  c(as.character(CC16_331_8),as.character(CC16_331_8)),              # Immigration - Ban Muslims from immigrating to the U.S.
	immi_policy9 =  c(as.character(CC16_331_9),as.character(CC16_331_9)),              # Immigration - None of these

	abrt_policy1 =  c(as.character(CC16_332a),as.character(CC16_332a)),               # Abortion Policies - Always allow a woman to obtain an abortion as a matter of choice
	abrt_policy2 =  c(as.character(CC16_332b),as.character(CC16_332b)),               # Abortion Policies - Permit abortion only in case of rape, incest or when the woman's life is in danger
	abrt_policy3 =  c(as.character(CC16_332c),as.character(CC16_332c)),               # Abortion Policies - Prohibit all abortions after the 20th week of pregnancy
	abrt_policy4 =  c(as.character(CC16_332d),as.character(CC16_332d)),               # Abortion Policies - Allow employers to decline coverage of abortions in insurance plans
	abrt_policy5 =  c(as.character(CC16_332e),as.character(CC16_332e)),               # Abortion Policies - Prohibit the expenditure of funds authorized or appropriated by federal law for any abortion
	abrt_policy6 =  c(as.character(CC16_332f),as.character(CC16_332f)),               # Abortion Policies - Make abortions illegal in all circumstances

	envs_policy1 =  c(as.character(CC16_333a),as.character(CC16_333a)),               # Environment Policies - Give Environmental Protection Agency power to regulate Carbon Dioxide emissions
	envs_policy2 =  c(as.character(CC16_333b),as.character(CC16_333b)),               # Environment Policies - Raise required fuel efficiency for the average automobile from 25 mpg to 35 mpg
	envs_policy3 =  c(as.character(CC16_333c),as.character(CC16_333c)),               # Environment Policies - Require a minimum amount of renewable fuels (wind, solar, and hydroelectric) in the generation of electricity even if electricity prices increase somewhat
	envs_policy4 =  c(as.character(CC16_333d),as.character(CC16_333d)),               # Environment Policies - Strengthen enforcement of the lean Air Act and Clean Water Act even if it costs US jobs

	crme_policy1 =  c(as.character(CC16_334a),as.character(CC16_334a)),               # Crime Policies - Eliminate mandatory minimum sentences for non-violent drug offenders
	crme_policy2 =  c(as.character(CC16_334b),as.character(CC16_334b)),               # Crime Policies - Require police officers to wear body cameras that record all of their activities while on duty
	crme_policy3 =  c(as.character(CC16_334c),as.character(CC16_334c)),               # Crime Policies - Increase the number of police on the street by 10 percent, even if it means fewer funds for other public services
	crme_policy4 =  c(as.character(CC16_334d),as.character(CC16_334d)),               # Crime Policies - Increase prison sentences for felons who have already committed two or more serious or violent crimes

	gays_policy1 =  c(as.character(CC16_335),as.character(CC16_335)),                # Gay Marriage

	budg_policy1 =  c(as.character(CC16_337_1),as.character(CC16_337_1)),              # Budget Priorities - Cut Defense Spending
	budg_policy2 =  c(as.character(CC16_337_2),as.character(CC16_337_2)),              # Budget Priorities - Cut Domestic Spending
	budg_policy3 =  c(as.character(CC16_337_3),as.character(CC16_337_3)),              # Budget Priorities - Raise Taxes

	roll_garland =  c(as.character(CC16_351A),as.character(CC16_351A)),               # For or Against - Congress - Approve Garland nomination  			 		## TeaParty/Conservative

	roll_tpp_act =  c(as.character(CC16_351B),as.character(CC16_351B)),               # For or Against - Congress - Trans-Pacific Partnership Act  		 		## Trade
	roll_trd_adj =  c(as.character(CC16_351D),as.character(CC16_351D)),               # For or Against - Congress - Trade Adjustment Assistence Act 		 		## Trade

	roll_usa_fre =  c(as.character(CC16_351C),as.character(CC16_351C)),               # For or Against - Congress - USA Freedom Act                		 		## Terrorism/Privacy
	roll_iransct =  c(as.character(CC16_351G),as.character(CC16_351G)),               # For or Against - Congress - Iran Sanctions Act

	roll_educrfr =  c(as.character(CC16_351E),as.character(CC16_351E)),               # For or Against - Congress - Education Reform                  	 		## Education
	roll_infrast =  c(as.character(CC16_351F),as.character(CC16_351F)),               # For or Against - Congress - Highway and Transportation Funding Act 		## Infrastructure/Spending

	roll_medicar =  c(as.character(CC16_351H),as.character(CC16_351H)),               # For or Against - Congress - Medicare Accountability and Cost Reform Act  	## Medicare/Health Care
	roll_rpl_aca =  c(as.character(CC16_351I),as.character(CC16_351I)),               # For or Against - Congress - Repeal Affordable Care Act 					## Health care
	roll_minwage =  c(as.character(CC16_351K ),as.character(CC16_351K)),              # For or Against - Congress - Minimum wage 									## Wage/jobs
# more covars here
  valid_primary = c(as.numeric(!is.na(CL_E2016PPVM)),as.numeric(!is.na(CL_E2016PPVM))),
  valid_general = c(as.numeric(!is.na(CL_E2016GVM)),as.numeric(!is.na(CL_E2016GVM))),
  #!is.na(cces_stacked$CL_E2016GVM)  # general
  #!is.na(cces_stacked$CL_E2016PPVM) # primary
	pre_pid7 = c(as.character(pid7),as.character(pid7)),
	post_pid7 = c(as.character(pid7post),as.character(pid7post)),
	pre=pre, # pre/post election
	screen1 = c(as.character(screen1),as.character(screen1)), # pre screen
	screen2 = c(as.character(screen2),as.character(screen2)), # post screen
	skipped = c(as.character(skipped),as.character(skipped)) # post screen
)

for(j in 1:ncol(voter_matrix)){
	voter_matrix[,j]=as.character(voter_matrix[,j])
}

voter_matrix$male 	= as.numeric(voter_matrix$gender=='Male')
voter_matrix$female = as.numeric(voter_matrix$gender=='Female')

voter_matrix$white = as.numeric(voter_matrix$race=='White')
voter_matrix$black = as.numeric(voter_matrix$race=='Black')
voter_matrix$hisps = as.numeric(voter_matrix$race=='Hispanic')

voter_matrix$educ[grep(voter_matrix$educ,pattern='No HS')] = 0
voter_matrix$educ[grep(voter_matrix$educ,pattern='High school')] = 1
voter_matrix$educ[grep(voter_matrix$educ,pattern='Some college')] = 2
voter_matrix$educ[grep(voter_matrix$educ,pattern='2-')] = 3
voter_matrix$educ[grep(voter_matrix$educ,pattern='4-')] = 4
voter_matrix$educ[grep(voter_matrix$educ,pattern='Post-')] = 5
voter_matrix$educ=as.numeric(voter_matrix$educ)
voter_matrix$edloan=as.numeric(voter_matrix$edloan=='Yes')

voter_matrix$single 	= as.numeric(voter_matrix$married == 'Widowed' | voter_matrix$married=='Single' | voter_matrix$married =='Skipped')
voter_matrix$divorced   = as.numeric(voter_matrix$married=='Divorced' | voter_matrix$married=='Separated')
voter_matrix$married  	= as.numeric(voter_matrix$married=='Married' | voter_matrix$married=='Domestic partnership')

voter_matrix$reg 			= as.numeric(voter_matrix$reg=='Yes')
voter_matrix$vote12 		= as.numeric(grepl(voter_matrix$vote12,pattern='Yes'))
voter_matrix$vote12_obama 	= as.numeric(grepl(voter_matrix$vote12_obama,pattern='Obama'))
voter_matrix$primary 		= as.numeric(voter_matrix$primary)

voter_matrix$state 		= voter_matrix$state
voter_matrix$retired 	= as.numeric(voter_matrix$employ=='Retired')
voter_matrix$unemploy 	= as.numeric(voter_matrix$employ=='Unemployed')
voter_matrix$employ 	= as.numeric(voter_matrix$employ=='Full-time')

voter_matrix$job_attorney = as.numeric(
	grepl(voter_matrix$job,pattern='Professional, Scientific, and Technical Services'))
voter_matrix$job_teacher  = as.numeric(
	grepl(voter_matrix$job,pattern='Education Services') | grepl(voter_matrix$job,pattern='Health Care and Social Assistance'))
voter_matrix$job_citycouncil= as.numeric(
	grepl(voter_matrix$job,pattern='Public Administration') | grepl(voter_matrix$job,pattern='Utilities') | grepl(voter_matrix$job,pattern='Waste Management and Remediation Services'))
voter_matrix$job_factory = as.numeric(
	grepl(voter_matrix$job,pattern='Transportation  and Warehousing') | grepl(voter_matrix$job,pattern='Mining') | grepl(voter_matrix$job,pattern='Manufacturing'))
voter_matrix$job_ceo = as.numeric(
	grepl(voter_matrix$job,pattern='Wholesale Trade') | grepl(voter_matrix$job,pattern='Retail Trade') |
	grepl(voter_matrix$job,pattern='Finance  and Insurance') | grepl(voter_matrix$job,pattern='Information') |
	grepl(voter_matrix$job,pattern='Management of Companies and Enterprises') | grepl(voter_matrix$job,pattern='Real Estate and Rental and Leasing'))
voter_matrix$job_farmer = as.numeric(
	grepl(voter_matrix$job,pattern='Agriculture') |
	grepl(voter_matrix$job,pattern='Fishing and Hunting') |
	grepl(voter_matrix$job,pattern='Forestry'))
voter_matrix$job_polstaffer = as.numeric(
	grepl(voter_matrix$job,pattern='Public Administration'))
voter_matrix$job_smallbusiness = as.numeric(
	grepl(voter_matrix$job,pattern='Construction') |
	grepl(voter_matrix$job,pattern='Hotel Accommodation  and  Food Services'))
voter_matrix$job_military = as.numeric(voter_matrix$milstat_5=='No')

voter_matrix$religion_none = as.numeric(
	grepl(voter_matrix$religion,pattern='Nothing') | grepl(voter_matrix$religion,pattern='Agnostic') | grepl(voter_matrix$religion,pattern='Atheist') | grepl(voter_matrix$religion,pattern='Skipped'))
voter_matrix$religion_catholic = as.numeric(
	grepl(voter_matrix$religion,pattern='Eastern or Greek Orthodox') | grepl(voter_matrix$religion,pattern='Catholic'))
voter_matrix$religion_protestant = as.numeric(
	grepl(voter_matrix$religion,pattern='Protestant') & grepl(voter_matrix$born_again,pattern='No'))
voter_matrix$religion_evangelical = as.numeric(
	grepl(voter_matrix$religion,pattern='Protestant') & grepl(voter_matrix$born_again,pattern='Yes'))

voter_matrix$born_again = as.numeric(voter_matrix$born_again=='Yes')
voter_matrix$unionhh = as.numeric(grepl(voter_matrix$unionhh,pattern='Yes'))

voter_matrix$faminc[grep(voter_matrix$faminc,pattern='Less than')] = 0
voter_matrix$faminc[grep(voter_matrix$faminc,pattern='[$]10,000')] = 1
voter_matrix$faminc[grep(voter_matrix$faminc,pattern='[$]20,000')] = 2
voter_matrix$faminc[grep(voter_matrix$faminc,pattern='[$]30,000')] = 3
voter_matrix$faminc[grep(voter_matrix$faminc,pattern='[$]40,000')] = 4
voter_matrix$faminc[grep(voter_matrix$faminc,pattern='[$]50,000')] = 5
voter_matrix$faminc[grep(voter_matrix$faminc,pattern='[$]60,000')] = 6
voter_matrix$faminc[grep(voter_matrix$faminc,pattern='[$]70,000')] = 7
voter_matrix$faminc[grep(voter_matrix$faminc,pattern='[$]80,000')] = 8
voter_matrix$faminc[grep(voter_matrix$faminc,pattern='[$]100,000')] = 9
voter_matrix$faminc[grep(voter_matrix$faminc,pattern='[$]120,000')] = 10
voter_matrix$faminc[grep(voter_matrix$faminc,pattern='[$]150,000')] = 11
voter_matrix$faminc[grep(voter_matrix$faminc,pattern='[$]200,000')] = 12
voter_matrix$faminc[grep(voter_matrix$faminc,pattern='[$]250,000')] = 13
voter_matrix$faminc[grep(voter_matrix$faminc,pattern='[$]350,000')] = 14
voter_matrix$faminc[grep(voter_matrix$faminc,pattern='[$]500,000')] = 15
voter_matrix$faminc[grep(voter_matrix$faminc,pattern="Prefer not to say")]=NA
voter_matrix$faminc=as.numeric(voter_matrix$faminc)
voter_matrix$investor=as.numeric(voter_matrix$investor=='Yes')
voter_matrix$queer_identity = as.numeric(
	grepl(voter_matrix$trans,pattern='Yes') |
	grepl(voter_matrix$sexuality,pattern='Bisexual') |
	grepl(voter_matrix$sexuality,pattern='Gay') |
	grepl(voter_matrix$sexuality,pattern='Lesbian') |
	grepl(voter_matrix$sexuality,pattern='Other'))
voter_matrix$queer_identity[grep(voter_matrix$sexuality,pattern='Asked')]=NA
voter_matrix$queer_identity[grep(voter_matrix$sexuality,pattern='Skipped')]=NA

voter_matrix$health_insured = as.numeric(
	grepl(voter_matrix$healthins_1,pattern='Yes') |
	grepl(voter_matrix$healthins_2,pattern='Yes') |
	grepl(voter_matrix$healthins_3,pattern='Yes') |
	grepl(voter_matrix$healthins_4,pattern='Yes'))

voter_matrix$aca_insured = as.numeric(
	grepl(voter_matrix$healthins_2,pattern='Yes') |
	grepl(voter_matrix$healthins2,pattern='Yes'))

voter_matrix$media_interest =
	as.numeric(grepl(voter_matrix$media_blog,pattern='Yes'))+
	as.numeric(grepl(voter_matrix$media_tv,pattern='Yes'))+
	as.numeric(grepl(voter_matrix$media_news,pattern='Yes'))+
	as.numeric(grepl(voter_matrix$media_radio,pattern='Yes'))+
	as.numeric(grepl(voter_matrix$media_social,pattern='Yes'))

voter_matrix$socialmedia_interest =
	as.numeric(grepl(voter_matrix$socmedia_polstory1,pattern='Yes'))+
	as.numeric(grepl(voter_matrix$socmedia_polstory2,pattern='Yes'))+
	as.numeric(grepl(voter_matrix$socmedia_polstory3,pattern='Yes'))+
	as.numeric(grepl(voter_matrix$socmedia_polcomment,pattern='Yes'))+
    as.numeric(grepl(voter_matrix$socmedia_polfollow,pattern='Yes'))

na=grep(voter_matrix$mip_guns,pattern='Not Asked')
voter_matrix$mip_guns = as.numeric(grepl(voter_matrix$mip_guns,pattern='High'))
voter_matrix$mip_guns[na]=NA

na=grep(voter_matrix$mip_abortion,pattern='Not Asked')
voter_matrix$mip_abortion = as.numeric(grepl(voter_matrix$mip_abortion,pattern='High'))
voter_matrix$mip_abortion[na]=NA

na=grep(voter_matrix$mip_taxes,pattern='Not Asked')
voter_matrix$mip_taxes = as.numeric(grepl(voter_matrix$mip_taxes,pattern='High'))
voter_matrix$mip_taxes[na]=NA

na=grep(voter_matrix$mip_immigration,pattern='Not Asked')
voter_matrix$mip_immigration = as.numeric(grepl(voter_matrix$mip_immigration,pattern='High'))
voter_matrix$mip_immigration[na]=NA

na=grep(voter_matrix$mip_deficit,pattern='Not Asked')
voter_matrix$mip_deficit = as.numeric(grepl(voter_matrix$mip_deficit,pattern='High'))
voter_matrix$mip_deficit[na]=NA

na=grep(voter_matrix$mip_defense,pattern='Not Asked')
voter_matrix$mip_defense = as.numeric(grepl(voter_matrix$mip_defense,pattern='High'))
voter_matrix$mip_defense[na]=NA

na=grep(voter_matrix$mip_socsec,pattern='Not Asked')
voter_matrix$mip_socsec = as.numeric(grepl(voter_matrix$mip_socsec,pattern='High'))
voter_matrix$mip_socsec[na]=NA

na=grep(voter_matrix$mip_env,pattern='Not Asked')
voter_matrix$mip_env = as.numeric(grepl(voter_matrix$mip_env,pattern='High'))
voter_matrix$mip_env[na]=NA

na=grep(voter_matrix$mip_jobs,pattern='Not Asked')
voter_matrix$mip_jobs = as.numeric(grepl(voter_matrix$mip_jobs,pattern='High'))
voter_matrix$mip_jobs[na]=NA

na=grep(voter_matrix$mip_crime,pattern='Not Asked')
voter_matrix$mip_crime = as.numeric(grepl(voter_matrix$mip_crime,pattern='High'))
voter_matrix$mip_crime[na]=NA

na=grep(voter_matrix$mip_natlsecurity,pattern='Not Asked')
voter_matrix$mip_natlsecurity = as.numeric(grepl(voter_matrix$mip_natlsecurity,pattern='High'))
voter_matrix$mip_natlsecurity[na]=NA

na=grep(voter_matrix$mip_race,pattern='Not Asked')
voter_matrix$mip_race = as.numeric(grepl(voter_matrix$mip_race,pattern='High'))
voter_matrix$mip_race[na]=NA

na=grep(voter_matrix$mip_healthcare,pattern='Not Asked')
voter_matrix$mip_healthcare = as.numeric(grepl(voter_matrix$mip_healthcare,pattern='High'))
voter_matrix$mip_healthcare[na]=NA

na=grep(voter_matrix$mip_gays,pattern='Not Asked')
voter_matrix$mip_gays = as.numeric(grepl(voter_matrix$mip_gays,pattern='High'))
voter_matrix$mip_gays[na]=NA

na=grep(voter_matrix$mip_govtcorruption,pattern='Not Asked')
voter_matrix$mip_govtcorruption = as.numeric(grepl(voter_matrix$mip_govtcorruption,pattern='High'))
voter_matrix$mip_govtcorruption[na]=NA

na=grep(voter_matrix$mip_religion,pattern='Not Asked')
voter_matrix$mip_religion = as.numeric(grepl(voter_matrix$mip_religion,pattern='Very') | grepl(voter_matrix$mip_religion,pattern='Somewhat'))
voter_matrix$mip_religion[na]=NA

voter_matrix$envs_policy1 = as.numeric(grepl(voter_matrix$envs_policy1,pattern='Oppose'))
voter_matrix$envs_policy2 = as.numeric(grepl(voter_matrix$envs_policy2,pattern='Oppose'))
voter_matrix$envs_policy3 = as.numeric(grepl(voter_matrix$envs_policy3,pattern='Oppose'))
voter_matrix$envs_policy4 = as.numeric(grepl(voter_matrix$envs_policy4,pattern='Oppose'))

voter_matrix$crme_policy1 = as.numeric(grepl(voter_matrix$crme_policy1,pattern='Oppose'))
voter_matrix$crme_policy2 = as.numeric(grepl(voter_matrix$crme_policy2,pattern='Oppose'))

voter_matrix$crme_policy3 = as.numeric(grepl(voter_matrix$crme_policy3,pattern='Support'))
voter_matrix$crme_policy4 = as.numeric(grepl(voter_matrix$crme_policy4,pattern='Support'))

voter_matrix$abrt_policy1 = as.numeric(grepl(voter_matrix$abrt_policy1,pattern='Oppose'))
voter_matrix$abrt_policy2 = as.numeric(grepl(voter_matrix$abrt_policy2,pattern='Support'))
voter_matrix$abrt_policy3 = as.numeric(grepl(voter_matrix$abrt_policy3,pattern='Support'))
voter_matrix$abrt_policy4 = as.numeric(grepl(voter_matrix$abrt_policy4,pattern='Support'))
voter_matrix$abrt_policy5 = as.numeric(grepl(voter_matrix$abrt_policy5,pattern='Support'))
voter_matrix$abrt_policy6 = as.numeric(grepl(voter_matrix$abrt_policy6,pattern='Support'))

voter_matrix$gays_policy1 = as.numeric(grepl(voter_matrix$gays_policy1,pattern='Oppose'))

voter_matrix$immi_policy1 = as.numeric(voter_matrix$immi_policy1=='No')
voter_matrix$immi_policy3 = as.numeric(voter_matrix$immi_policy3=='No')

na=grep(voter_matrix$immi_policy6,pattern='Asked')
voter_matrix$immi_policy6 = as.numeric(voter_matrix$immi_policy6=='No')
voter_matrix$immi_policy6[na]=NA

voter_matrix$immi_policy2 = as.numeric(voter_matrix$immi_policy2=='Yes')
voter_matrix$immi_policy7 = as.numeric(voter_matrix$immi_policy7=='Yes')

na=grep(voter_matrix$immi_policy4,pattern='Asked')
voter_matrix$immi_policy4 = as.numeric(voter_matrix$immi_policy4=='Yes')
voter_matrix$immi_policy4[na]=NA

na=grep(voter_matrix$immi_policy5,pattern='Asked')
voter_matrix$immi_policy5 = as.numeric(voter_matrix$immi_policy5=='Yes')
voter_matrix$immi_policy5[na]=NA

na=grep(voter_matrix$immi_policy8,pattern='Asked')
voter_matrix$immi_policy8 = as.numeric(voter_matrix$immi_policy8=='Yes')
voter_matrix$immi_policy8[na]=NA

voter_matrix$guns_policy1 = as.numeric(grepl(voter_matrix$guns_policy1,pattern='Oppose'))
voter_matrix$guns_policy3 = as.numeric(grepl(voter_matrix$guns_policy3,pattern='Oppose'))
voter_matrix$guns_policy2 = as.numeric(grepl(voter_matrix$guns_policy2,pattern='Support'))
voter_matrix$guns_policy4 = as.numeric(grepl(voter_matrix$guns_policy4,pattern='Support'))

na=grep(voter_matrix$roll_garland,pattern='Asked')
voter_matrix$roll_garland = as.numeric(grepl(voter_matrix$roll_garland,pattern='For'))
voter_matrix$roll_garland[na]=NA

na=grep(voter_matrix$roll_usa_fre,pattern='Asked')
voter_matrix$roll_usa_fre = as.numeric(grepl(voter_matrix$roll_usa_fre,pattern='For'))
voter_matrix$roll_usa_fre[na]=NA

voter_matrix$roll_iransct = as.numeric(grepl(voter_matrix$roll_iransct,pattern='For'))
#voter_matrix$roll_medicar = as.numeric(grepl(voter_matrix$roll_medicar,pattern='Against'))
voter_matrix$roll_medicar = as.numeric(grepl(voter_matrix$roll_medicar,pattern='For'))
voter_matrix$roll_rpl_aca = as.numeric(grepl(voter_matrix$roll_rpl_aca,pattern='For'))

na=grep(voter_matrix$roll_trd_adj,pattern='Asked')
voter_matrix$roll_trd_adj = as.numeric(grepl(voter_matrix$roll_trd_adj,pattern='Against'))
voter_matrix$roll_trd_adj[na]=NA

voter_matrix$roll_tpp_act = as.numeric(grepl(voter_matrix$roll_tpp_act,pattern='Against'))
voter_matrix$roll_infrast = as.numeric(grepl(voter_matrix$roll_infrast,pattern='Against'))
voter_matrix$roll_educrfr = as.numeric(grepl(voter_matrix$roll_educrfr,pattern='Against'))
voter_matrix$roll_minwage = as.numeric(grepl(voter_matrix$roll_minwage,pattern='Against'))

voter_matrix$self_place_attempt=1
voter_matrix$self_place_attempt[which(grepl(voter_matrix$self_place,pattern='Not sure') | grepl(voter_matrix$self_place,pattern='Skipped'))] = 0

voter_matrix$self_place[grep(voter_matrix$self_place,pattern='Very Liberal')] = -3
voter_matrix$self_place[grep(voter_matrix$self_place,pattern='Somewhat Liberal')] = -1
voter_matrix$self_place[grep(voter_matrix$self_place,pattern='Liberal')] = -2
voter_matrix$self_place[which(grepl(voter_matrix$self_place,pattern='Middle') | grepl(voter_matrix$self_place,pattern='Not sure') | grepl(voter_matrix$self_place,pattern='Skipped'))] = 0
voter_matrix$self_place[grep(voter_matrix$self_place,pattern='Very Conservative')] =  3
voter_matrix$self_place[grep(voter_matrix$self_place,pattern='Somewhat Conservative')] =  1
voter_matrix$self_place[grep(voter_matrix$self_place,pattern='Conservative')] =  2
voter_matrix$self_place=as.numeric(voter_matrix$self_place)

voter_matrix$dem_place[grep(voter_matrix$dem_place,pattern='Very Liberal')] = -3
voter_matrix$dem_place[grep(voter_matrix$dem_place,pattern='Somewhat Liberal')] = -1
voter_matrix$dem_place[grep(voter_matrix$dem_place,pattern='Liberal')] = -2
voter_matrix$dem_place[which(grepl(voter_matrix$dem_place,pattern='Middle') | grepl(voter_matrix$dem_place,pattern='Not sure') | grepl(voter_matrix$dem_place,pattern='Skipped'))] = 0
voter_matrix$dem_place[grep(voter_matrix$dem_place,pattern='Very Conservative')] =  3
voter_matrix$dem_place[grep(voter_matrix$dem_place,pattern='Somewhat Conservative')] =  1
voter_matrix$dem_place[grep(voter_matrix$dem_place,pattern='Conservative')] =  2
voter_matrix$dem_place=as.numeric(voter_matrix$dem_place)

voter_matrix$rep_place[grep(voter_matrix$rep_place,pattern='Very Liberal')] = -3
voter_matrix$rep_place[grep(voter_matrix$rep_place,pattern='Somewhat Liberal')] = -1
voter_matrix$rep_place[grep(voter_matrix$rep_place,pattern='Liberal')] = -2
voter_matrix$rep_place[which(grepl(voter_matrix$rep_place,pattern='Middle') | grepl(voter_matrix$rep_place,pattern='Not sure') | grepl(voter_matrix$rep_place,pattern='Skipped'))] = 0
voter_matrix$rep_place[grep(voter_matrix$rep_place,pattern='Very Conservative')] =  3
voter_matrix$rep_place[grep(voter_matrix$rep_place,pattern='Somewhat Conservative')] =  1
voter_matrix$rep_place[grep(voter_matrix$rep_place,pattern='Conservative')] =  2
voter_matrix$rep_place=as.numeric(voter_matrix$rep_place)

voter_matrix$sophistication = grepl(voter_matrix$us_house_majority,pattern='Republicans') + grepl(voter_matrix$us_senate_majority,pattern='Republicans')
voter_matrix$sophistication = (voter_matrix$sophistication +
			as.numeric(grepl(voter_matrix$know_gov,pattern='Republican')      & grepl(voter_matrix$party_gov,pattern='Republican')) +
			as.numeric(grepl(voter_matrix$know_gov,pattern='Democrat')        & grepl(voter_matrix$party_gov,pattern='Democrat')) +
			as.numeric(grepl(voter_matrix$know_gov,pattern='Independent')     & grepl(voter_matrix$party_gov,pattern='Independent')) +

			as.numeric(grepl(voter_matrix$know_senate1,pattern='Republican')  & grepl(voter_matrix$party_senate1,pattern='Republican')) +
			as.numeric(grepl(voter_matrix$know_senate1,pattern='Democrat')    & grepl(voter_matrix$party_senate1,pattern='Democrat')) +
			as.numeric(grepl(voter_matrix$know_senate1,pattern='Independent') & grepl(voter_matrix$party_senate1,pattern='Independent')) +

			as.numeric(grepl(voter_matrix$know_senate2,pattern='Republican')  & grepl(voter_matrix$party_senate2,pattern='Republican')) +
			as.numeric(grepl(voter_matrix$know_senate2,pattern='Democrat')    & grepl(voter_matrix$party_senate2,pattern='Democrat')) +
			as.numeric(grepl(voter_matrix$know_senate2,pattern='Independent') & grepl(voter_matrix$party_senate2,pattern='Independent')) +

			as.numeric(grepl(voter_matrix$know_house,pattern='Republican')    & grepl(voter_matrix$party_house,pattern='Republican')) +
			as.numeric(grepl(voter_matrix$know_house,pattern='Democrat')      & grepl(voter_matrix$party_house,pattern='Democrat')) +
			as.numeric(grepl(voter_matrix$know_house,pattern='Independent')   & grepl(voter_matrix$party_house,pattern='Independent')) +

			as.numeric(voter_matrix$dem_place<=voter_matrix$rep_place)
)
voter_matrix$newsint[grep(voter_matrix$newsint,pattern='Most of the time')]=4
voter_matrix$newsint[grep(voter_matrix$newsint,pattern='Some of the time')]=3
voter_matrix$newsint[grep(voter_matrix$newsint,pattern='Only now and then')]=2
voter_matrix$newsint[grep(voter_matrix$newsint,pattern='Hardly at all')]=1
voter_matrix$newsint[grep(voter_matrix$newsint,pattern="Don't know")]=0
voter_matrix$newsint[grep(voter_matrix$newsint,pattern="Skipped")]=0
voter_matrix$newsint=as.numeric(voter_matrix$newsint)


# do.impute
#do.impute=F
#if(do.impute==T){
library(mice)
covs=c(
	'male','female','white','black','hisps','educ','edloan','single','divorced','married','reg','vote12','vote12_obama','primary',
	#'state',
	'retired','unemploy','employ','job_attorney','job_teacher','job_citycouncil','job_factory','job_ceo','job_farmer','job_polstaffer',
	'job_smallbusiness','job_military','religion_none','religion_catholic','religion_protestant','religion_evangelical','born_again','unionhh',
	'faminc','investor','queer_identity','health_insured','aca_insured','media_interest','socialmedia_interest',
	'mip_guns','mip_abortion','mip_taxes','mip_immigration','mip_deficit','mip_defense','mip_socsec','mip_env','mip_jobs','mip_crime','mip_natlsecurity',
	'mip_race','mip_healthcare','mip_gays','mip_govtcorruption','mip_religion',
	'envs_policy1','envs_policy2','envs_policy3','envs_policy4','crme_policy1','crme_policy2','crme_policy3','crme_policy4',
	'abrt_policy1','abrt_policy2','abrt_policy3','abrt_policy4','abrt_policy5','abrt_policy6','gays_policy1',
	'immi_policy1','immi_policy3','immi_policy6','immi_policy2','immi_policy7','immi_policy4','immi_policy5','immi_policy8',
	'guns_policy1','guns_policy3','guns_policy2','guns_policy4',
	'roll_garland','roll_usa_fre','roll_iransct','roll_medicar','roll_rpl_aca','roll_trd_adj','roll_tpp_act','roll_infrast','roll_educrfr',
	'roll_minwage','sophistication',
	#'know_gov','know_senate1','know_senate2','know_house',
	'self_place','dem_place','rep_place','newsint','pre_pid7'
)

ix=array(NA,length(covs))
for(i in 1:length(ix)){
	ix[i]=which(names(voter_matrix)==covs[i])
}

data_mat=as.matrix(voter_matrix[,ix])
data_mat_pid=data_mat[,ncol(data_mat)]
data_mat=data_mat[,-c(ncol(data_mat))]

candidate_matrix$respondent=c(cces_stacked$indx,cces_stacked$indx)

un_respondent=unique(candidate_matrix$respondent)

un_pid=array(NA,length(un_respondent))
un_mat=matrix(NA,length(un_respondent),ncol(data_mat))
for(j in 1:length(un_respondent)){
	ij=which(un_respondent[j]==candidate_matrix$respondent)
	un_mat[j,]=data_mat[ij[1],]
	un_pid[j]=unique(data_mat_pid[ij])
}
for(j in 1:ncol(un_mat)){
	un_mat[,j]=as.numeric(un_mat[,j])
}
#library(matrixStats)
#which(colVars(un_mat,na.rm=T)==0)

if(!file.exists('data/imputeVoterMat.Rdata')){
	# fills in a few missing items on covariates
	un_mat=as.data.frame(un_mat)
	for(j in 1:ncol(un_mat)){
		un_mat[,j]=as.numeric(as.character(un_mat[,j]))
	}
	imout=mice(un_mat,m=1,maxit=40)
	xout=complete(imout)
	save(xout,file='data/imputeVoterMat.Rdata')
} else if(file.exists('data/imputeVoterMat.Rdata')){
	load('data/imputeVoterMat.Rdata')
}

un_respondent=unique(candidate_matrix$respondent)

for(j in 1:length(un_respondent)){
	ij=which(un_respondent[j]==candidate_matrix$respondent)
	for(i in 1:length(ij)){
		data_mat[ij[i],1:ncol(xout)]=c(t(xout[j,1:ncol(xout)]))
	}
}
voter_matrix[,ix[1:97]]=data_mat

detach(cces_stacked)

###########################################
###Then, need to dummy out all levels (will omit one level of each later in models, but dummying all out now so can make that choice later)

#gender
candidate_matrix$g_male <- ifelse(candidate_matrix$gender=="Male",1,0)
candidate_matrix$g_female <- ifelse(candidate_matrix$gender=="Female",1,0)

#race
candidate_matrix$re_black <- ifelse(candidate_matrix$race=="Black",1,0)
candidate_matrix$re_hispanic <- ifelse(candidate_matrix$race=="Hispanic",1,0)
candidate_matrix$re_white <- ifelse(candidate_matrix$race=="White",1,0)

#religion
candidate_matrix$r_catholic <- ifelse(candidate_matrix$religion=="Catholic",1,0)
candidate_matrix$r_evangelical <- ifelse(candidate_matrix$religion=="Evangelical Protestant",1,0)
candidate_matrix$r_none <- ifelse(candidate_matrix$religion=="None Listed",1,0)
candidate_matrix$r_protestant <- ifelse(candidate_matrix$religion=="Protestant",1,0)

#occupation
candidate_matrix$o_attorney <- ifelse(candidate_matrix$occupation=="Attorney",1,0)
candidate_matrix$o_ceo <- ifelse(candidate_matrix$occupation=="CEO",1,0)
candidate_matrix$o_citycouncil <- ifelse(candidate_matrix$occupation=="City Council Member",1,0)
candidate_matrix$o_factoryforeman <- ifelse(candidate_matrix$occupation=="Factory Foreman",1,0)
candidate_matrix$o_farmer <- ifelse(candidate_matrix$occupation=="Farmer",1,0)
candidate_matrix$o_usarmymajor <- ifelse(candidate_matrix$occupation=="Former US Army Major",1,0)
candidate_matrix$o_politicalstaffer <- ifelse(candidate_matrix$occupation=="Political Staffer",1,0)
candidate_matrix$o_smallbizowner <- ifelse(candidate_matrix$occupation=="Small Business Owner",1,0)
candidate_matrix$o_stateleg <- ifelse(candidate_matrix$occupation=="State Legislator",1,0)
candidate_matrix$o_teacher <- ifelse(candidate_matrix$occupation=="Teacher",1,0)

#personality
candidate_matrix$p_compassionate <- ifelse(candidate_matrix$personality=="Compassionate",1,0)
candidate_matrix$p_decent <- ifelse(candidate_matrix$personality=="Decent",1,0)
candidate_matrix$p_empathetic <- ifelse(candidate_matrix$personality=="Empathetic",1,0)
candidate_matrix$p_inspiring <- ifelse(candidate_matrix$personality=="Inspiring",1,0)
candidate_matrix$p_intelligent <- ifelse(candidate_matrix$personality=="Intelligent",1,0)
candidate_matrix$p_knowledgeable <- ifelse(candidate_matrix$personality=="Knowledgeable",1,0)
candidate_matrix$p_moral <- ifelse(candidate_matrix$personality=="Moral",1,0)
candidate_matrix$p_strongleader <- ifelse(candidate_matrix$personality=="Strong Leader",1,0)

#endorsements
candidate_matrix$e_business <- ifelse(candidate_matrix$endorsements=="Business groups (Chamber of Commerce, Small Business Associations)",1,0)
candidate_matrix$e_christian <- ifelse(candidate_matrix$endorsements=="Christian groups (Family Research Council, Focus on the Family)",1,0)
candidate_matrix$e_civilrights <- ifelse(candidate_matrix$endorsements=="Civil rights groups (key figures in the NAACP and Urban League)",1,0)
candidate_matrix$e_energy <- ifelse(candidate_matrix$endorsements=="Energy groups (American Petroleum and Mining Associations)",1,0)
candidate_matrix$e_environment <- ifelse(candidate_matrix$endorsements=="Environmental groups (Sierra Club, Natural Resources Defense Council)",1,0)
candidate_matrix$e_guncontrol <- ifelse(candidate_matrix$endorsements=="Gun control groups (Coalition to Stop Gun Violence, Brady Campaign)",1,0)
candidate_matrix$e_gunrights <- ifelse(candidate_matrix$endorsements=="Gun rights groups (NRA, Gun Owners of America)",1,0)
candidate_matrix$e_laborunions <- ifelse(candidate_matrix$endorsements=="Labor unions (AFL-CIO, SEIU)",1,0)
candidate_matrix$e_newspapers <- ifelse(candidate_matrix$endorsements=="Major area newspapers (Tribune, Herald)",1,0)
candidate_matrix$e_reproductive <- ifelse(candidate_matrix$endorsements=="Reproductive rights groups (Planned Parenthood, NARAL)",1,0)
candidate_matrix$e_taxreform <- ifelse(candidate_matrix$endorsements=="Tax reform groups (Club for Growth, Americans for Tax Reform)",1,0)
candidate_matrix$e_teaparty <- ifelse(candidate_matrix$endorsements=="Tea Party groups (FreedomWorks, Tea Party Patriots)",1,0)
candidate_matrix$e_veterans <- ifelse(candidate_matrix$endorsements=="Veterans groups (American Legion, American Veterans)",1,0)

#record
candidate_matrix$rec_help <- ifelse(candidate_matrix$record=="Help my constituents get the benefits they deserve",1,0)
candidate_matrix$rec_refuse <- ifelse(candidate_matrix$record=="Refuse to compromise my principles even when it means taking on my party",1,0)
candidate_matrix$rec_secure <- ifelse(candidate_matrix$record=="Secure appointment to a powerful legislative committee",1,0)
candidate_matrix$rec_stand <- ifelse(candidate_matrix$record=="Stand with my party to do what's right",1,0)
candidate_matrix$rec_work <- ifelse(candidate_matrix$record=="Work across the aisle to get things done",1,0)

#issue1
candidate_matrix$i1_cuttaxes <- ifelse(candidate_matrix$issue1=="Cut taxes on income and capital gains for all",1,0)
candidate_matrix$i1_lgbt <- ifelse(candidate_matrix$issue1=="Defend the rights of LGBT individuals",1,0)
candidate_matrix$i1_marriage <- ifelse(candidate_matrix$issue1=="Defend traditional marriage and religious beliefs",1,0)
candidate_matrix$i1_drilling <- ifelse(candidate_matrix$issue1=="Expand domestic oil and gas production through drilling",1,0)
candidate_matrix$i1_need <- ifelse(candidate_matrix$issue1=="Expand government and unemployment assistance for those in need",1,0)
candidate_matrix$i1_govabuse <- ifelse(candidate_matrix$issue1=="Prevent and prosecute abuse of government assistance programs",1,0)
candidate_matrix$i1_freetrade <- ifelse(candidate_matrix$issue1=="Promote expanding free trade agreements",1,0)
candidate_matrix$i1_righttochoose <- ifelse(candidate_matrix$issue1=="Protect a woman's right to choose",1,0)
candidate_matrix$i1_gunrights <- ifelse(candidate_matrix$issue1=="Protect gun owners' rights to defend themselves and others",1,0)
candidate_matrix$i1_unfairtrade <- ifelse(candidate_matrix$issue1=="Protect jobs and industry from unfair foreign trade",1,0)
candidate_matrix$i1_unbornlives <- ifelse(candidate_matrix$issue1=="Protect the lives of the unborn",1,0)
candidate_matrix$i1_citizenship <- ifelse(candidate_matrix$issue1=="Provide a path to citizenship for undocumented immigrants",1,0)
candidate_matrix$i1_raisetaxes <- ifelse(candidate_matrix$issue1=="Raise taxes on those making more than $250,000 a year",1,0)
candidate_matrix$i1_reducemilitary <- ifelse(candidate_matrix$issue1=="Reduce the size of military and number of military bases",1,0)
candidate_matrix$i1_policing <- ifelse(candidate_matrix$issue1=="Reform policing and stop racial profiling",1,0)
candidate_matrix$i1_co2emissions <- ifelse(candidate_matrix$issue1=="Regulate CO2 emissions to combat global warming",1,0)
candidate_matrix$i1_bordersecurity <- ifelse(candidate_matrix$issue1=="Strengthen border security to stop illegal immigration",1,0)
candidate_matrix$i1_guncontrol <- ifelse(candidate_matrix$issue1=="Strengthen gun control through commonsense restrictions",1,0)
candidate_matrix$i1_strengthenmilitary <- ifelse(candidate_matrix$issue1=="Strengthen our military and national defense",1,0)
candidate_matrix$i1_criminals <- ifelse(candidate_matrix$issue1=="Toughen sentences and penalties for criminals",1,0)

#issue2
candidate_matrix$i2_cuttaxes <- ifelse(candidate_matrix$issue2=="Cut taxes on income and capital gains for all",1,0)
candidate_matrix$i2_lgbt <- ifelse(candidate_matrix$issue2=="Defend the rights of LGBT individuals",1,0)
candidate_matrix$i2_marriage <- ifelse(candidate_matrix$issue2=="Defend traditional marriage and religious beliefs",1,0)
candidate_matrix$i2_drilling <- ifelse(candidate_matrix$issue2=="Expand domestic oil and gas production through drilling",1,0)
candidate_matrix$i2_need <- ifelse(candidate_matrix$issue2=="Expand government and unemployment assistance for those in need",1,0)
candidate_matrix$i2_govabuse <- ifelse(candidate_matrix$issue2=="Prevent and prosecute abuse of government assistance programs",1,0)
candidate_matrix$i2_freetrade <- ifelse(candidate_matrix$issue2=="Promote expanding free trade agreements",1,0)
candidate_matrix$i2_righttochoose <- ifelse(candidate_matrix$issue2=="Protect a woman's right to choose",1,0)
candidate_matrix$i2_gunrights <- ifelse(candidate_matrix$issue2=="Protect gun owners' rights to defend themselves and others",1,0)
candidate_matrix$i2_unfairtrade <- ifelse(candidate_matrix$issue2=="Protect jobs and industry from unfair foreign trade",1,0)
candidate_matrix$i2_unbornlives <- ifelse(candidate_matrix$issue2=="Protect the lives of the unborn",1,0)
candidate_matrix$i2_citizenship <- ifelse(candidate_matrix$issue2=="Provide a path to citizenship for undocumented immigrants",1,0)
candidate_matrix$i2_raisetaxes <- ifelse(candidate_matrix$issue2=="Raise taxes on those making more than $250,000 a year",1,0)
candidate_matrix$i2_reducemilitary <- ifelse(candidate_matrix$issue2=="Reduce the size of military and number of military bases",1,0)
candidate_matrix$i2_policing <- ifelse(candidate_matrix$issue2=="Reform policing and stop racial profiling",1,0)
candidate_matrix$i2_co2emissions <- ifelse(candidate_matrix$issue2=="Regulate CO2 emissions to combat global warming",1,0)
candidate_matrix$i2_bordersecurity <- ifelse(candidate_matrix$issue2=="Strengthen border security to stop illegal immigration",1,0)
candidate_matrix$i2_guncontrol <- ifelse(candidate_matrix$issue2=="Strengthen gun control through commonsense restrictions",1,0)
candidate_matrix$i2_strengthenmilitary <- ifelse(candidate_matrix$issue2=="Strengthen our military and national defense",1,0)
candidate_matrix$i2_criminals <- ifelse(candidate_matrix$issue2=="Toughen sentences and penalties for criminals",1,0)

##Either/Or Issues

candidate_matrix$i_cuttaxes <- ifelse(candidate_matrix$i1_cuttaxes==1 | candidate_matrix$i2_cuttaxes==1,1,0)
candidate_matrix$i_lgbt <- ifelse(candidate_matrix$i1_lgbt==1 | candidate_matrix$i2_lgbt==1,1,0)
candidate_matrix$i_marriage <- ifelse(candidate_matrix$i1_marriage==1 | candidate_matrix$i2_marriage==1,1,0)
candidate_matrix$i_drilling <- ifelse(candidate_matrix$i1_drilling==1 | candidate_matrix$i2_drilling==1,1,0)
candidate_matrix$i_need <- ifelse(candidate_matrix$i1_need==1 | candidate_matrix$i2_need==1,1,0)
candidate_matrix$i_govabuse <- ifelse(candidate_matrix$i1_govabuse==1 | candidate_matrix$i2_govabuse==1,1,0)
candidate_matrix$i_freetrade <- ifelse(candidate_matrix$i1_freetrade==1 | candidate_matrix$i2_freetrade==1,1,0)
candidate_matrix$i_righttochoose <- ifelse(candidate_matrix$i1_righttochoose==1 | candidate_matrix$i2_righttochoose==1,1,0)
candidate_matrix$i_gunrights <- ifelse(candidate_matrix$i1_gunrights==1 | candidate_matrix$i2_gunrights==1,1,0)
candidate_matrix$i_unfairtrade <- ifelse(candidate_matrix$i1_unfairtrade==1 | candidate_matrix$i2_unfairtrade==1,1,0)
candidate_matrix$i_unbornlives <- ifelse(candidate_matrix$i1_unbornlives==1 | candidate_matrix$i2_unbornlives==1,1,0)
candidate_matrix$i_citizenship <- ifelse(candidate_matrix$i1_citizenship==1 | candidate_matrix$i2_citizenship==1,1,0)
candidate_matrix$i_raisetaxes <- ifelse(candidate_matrix$i1_raisetaxes==1 | candidate_matrix$i2_raisetaxes==1,1,0)
candidate_matrix$i_reducemilitary <- ifelse(candidate_matrix$i1_reducemilitary==1 | candidate_matrix$i2_reducemilitary==1,1,0)
candidate_matrix$i_policing <- ifelse(candidate_matrix$i1_policing==1 | candidate_matrix$i2_policing==1,1,0)
candidate_matrix$i_co2emissions <- ifelse(candidate_matrix$i1_co2emissions==1 | candidate_matrix$i2_co2emissions==1,1,0)
candidate_matrix$i_bordersecurity <- ifelse(candidate_matrix$i1_bordersecurity==1 | candidate_matrix$i2_bordersecurity==1,1,0)
candidate_matrix$i_guncontrol <- ifelse(candidate_matrix$i1_guncontrol==1 | candidate_matrix$i2_guncontrol==1,1,0)
candidate_matrix$i_strengthenmilitary <- ifelse(candidate_matrix$i1_strengthenmilitary==1 | candidate_matrix$i2_strengthenmilitary==1,1,0)
candidate_matrix$i_criminals <- ifelse(candidate_matrix$i1_criminals==1 | candidate_matrix$i2_criminals==1,1,0)

#uniform weights given the design
candidate_matrix$wt=1

write.csv(voter_matrix,file="data/voter_matrix.csv")
write.csv(candidate_matrix,file="data/candidate_matrix.csv")

# trying to keep everything together in the data
# -- cces_stacked in data/cces_stacked_unmatched.Rdata
# -- voter_matrix in data/voter_matrix.csv
# -- candidate_matrix in data/candidate_matrix.csv

# END build initial matrices

# these are objects produced directly from cces modules that are provided as-is, in lieu of said modules
#cces_data_unmatched.Rdata
#cces_stacked_unmatched.Rdata
#screeners_matched_JAH_UCM.Rdata
#screeners_unmatched_JAH_UCM.Rdata
#candidate_matrix
#voter_matrix

# END
